
# data uitls
pacman::p_load(data.table, rio, tidyverse)
#  panel / factor model packages
pacman::p_load(synthdid, ebal)
set.seed(42)
if (Sys.info()[['user']] == "alal") {
  root = "/home/alal/Dropbox/1_Research/ElecAdminFunding/ElecAdminFundingReplication"
} else {
  root = "~/Dropbox/ElecAdminFunding/ElecAdminFundingReplication"
}

# Load code for a synth diff-in-diff estimator that
# uses entropy balancing unit weights
source(file.path(root, "code/estimation/_estimators.R"))

# Make a function to compute the number of treated counties,
# total counties, and observations from a dataset
n_table = function(data, years){
  treated = nrow(distinct(filter(data, treated==1), fips))
  counties = nrow(distinct(filter(data), fips))
  obs = nrow(data)*years
  return(c(treated, counties, obs))
}

# Load and clean the analysis file
raw = rio::import(file.path(root, "/modified_data/ctcl.dta")) %>% setDT
raw[, treated := ifelse((amount > 0) & policy_year == 2020, 1, 0)]
raw[policy_year == 2020, .N, treated]
raw[, ever_treated := max(treated), fips]
raw[, lvap := log(vap2020)]

# Make a wide version of the analysis file with years of 
# dem vote share along the columns rather than the rows
demvs_wide = dcast(raw[, .(fips, policy_year, vs_dem_pres)],
  fips ~ policy_year,
  value.var = "vs_dem_pres"
) %>%
  set_colnames(c("fips", paste0("vs_dem_", seq(1992, 2020, 4)))) %>%
  setkey('fips')

# Make a wide version of the analysis file with years of 
# turnout along the columns rather than the rows
turnout_wide = dcast(raw[, .(fips, policy_year, turnout_pres)],
  fips ~ policy_year,
  value.var = "turnout_pres") %>%
  set_colnames(c("fips", paste0("turnout_", seq(1992, 2020, 4)))) %>%
  setkey('fips')

# Set up a combined wide dataset with covariates and the 
# year-specific turnout and democratic vote share variables
addl_xs = c('lvap', 'share_white_nh', 'share_black_nh')
kv = c("fips", "treated", "amt_tercile", addl_xs)
wide_data = raw[policy_year == 2020, ..kv] %>%
  setkey('fips') %>%
  .[demvs_wide] %>%
  .[turnout_wide] %>%
  .[order(treated)]


##
# Democratic vote share analysis using synth diff-in-diff
# with entropy balancing unit weights
##

# Get a list of variables to include in the
# dem vote share analysis
kv_demvs = c("fips", "treated", addl_xs, 
  grep("vs_dem", colnames(wide_data), value = TRUE))

# Estimate the effect on dem vote share for the first tercile of
# grant size
t1_synth_dem_sdideb = wide_data[amt_tercile %in% c(0, 1), ..kv_demvs] %>%
  as_tibble %>%
  sdid_eb(t_c = 7, n_cov = length(addl_xs), intercept = TRUE, 1000)
t1_sdideb_n = n_table(filter(wide_data, amt_tercile %in% c(0, 1)), 7)

# Estimate the effect on dem vote share for the second tercile of
# grant size
t2_synth_dem_sdideb = wide_data[amt_tercile %in% c(0, 2), ..kv_demvs] %>%
  as_tibble %>%
  sdid_eb(t_c = 7, n_cov = length(addl_xs), intercept = TRUE, 1000)
t2_sdideb_n = n_table(filter(wide_data, amt_tercile %in% c(0, 2)), 7)

# Estimate the effect on dem vote share for the third tercile of
# grant size
t3_synth_dem_sdideb = wide_data[amt_tercile %in% c(0, 3), ..kv_demvs] %>%
  as_tibble %>%
  sdid_eb(t_c = 7, n_cov = length(addl_xs), intercept = TRUE, 1000)
t3_sdideb_n = n_table(filter(wide_data, amt_tercile %in% c(0, 3)), 7)


##
# Turnout analysis using synth diff-in-diff
# with entropy balancing unit weights
##

# Get a list of variables to include in the turnout analysis
kv_tur = c("fips", "treated", addl_xs, grep("turnout", colnames(wide_data), value = TRUE))

# Estimate the effect on turnout share for the first tercile of
# grant size
t1_synth_tur_sdideb = wide_data[amt_tercile %in% c(0, 1), ..kv_tur] %>%
  as_tibble %>%
  sdid_eb(t_c = 7, n_cov = length(addl_xs), intercept = TRUE, 1000)

# Estimate the effect on turnout share for the second tercile of
# grant size
t2_synth_tur_sdideb = wide_data[amt_tercile %in% c(0, 2), ..kv_tur] %>%
  as_tibble %>%
  sdid_eb(t_c = 7, n_cov = length(addl_xs), intercept = TRUE, 1000)

# Estimate the effect on turnout share for the third tercile of
# grant size
t3_synth_tur_sdideb = wide_data[amt_tercile %in% c(0, 3), ..kv_tur] %>%
  as_tibble %>%
  sdid_eb(t_c = 7, n_cov = length(addl_xs), intercept = TRUE, 1000)

# Save all of the output
save(
  t1_synth_dem_sdideb, t1_synth_tur_sdideb,
  t2_synth_dem_sdideb, t2_synth_tur_sdideb,
  t3_synth_dem_sdideb, t3_synth_tur_sdideb,
  t1_sdideb_n, t2_sdideb_n, t3_sdideb_n,
  file = file.path(root, "tmp/synth_expTercilesEB.RData")
)
